{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "07be6aa3-6636-4b57-be16-823c3907f4c4",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import requests\n",
    "import json\n",
    "from dotenv import load_dotenv\n",
    "from bs4 import BeautifulSoup\n",
    "from IPython.display import Markdown, display, update_display\n",
    "from openai import OpenAI"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0e64af7b-6956-4437-ab32-857a6ea814c3",
   "metadata": {},
   "outputs": [],
   "source": [
    "load_dotenv()\n",
    "api_key = os.getenv(\"OPENAI_API_KEY\")\n",
    "\n",
    "if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:\n",
    "    print(\"Api key found. Good to go!\")   \n",
    "else:\n",
    "    print(\"No correct api key was found\")\n",
    "MODEL = \"gpt-4o-mini\"\n",
    "openai = OpenAI(api_key=api_key)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4667e3ee-d5b7-42ed-99ad-5e9fa75c8660",
   "metadata": {},
   "outputs": [],
   "source": [
    "headers = {\n",
    " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "42adb18b-3ec9-4700-95e4-c0041ce8f17a",
   "metadata": {},
   "outputs": [],
   "source": [
    "class GithubProfile:\n",
    "\n",
    "    def __init__(self, url):\n",
    "        self.url = url\n",
    "        response = requests.get(url, headers=headers)\n",
    "        self.body = response.content\n",
    "        soup = BeautifulSoup(self.body, 'html.parser')\n",
    "        self.title = soup.title.string if soup.title else \"No title found\"\n",
    "        if soup.body:\n",
    "            for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
    "                irrelevant.decompose()\n",
    "            self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
    "        else:\n",
    "            self.text = \"\"\n",
    "        links = [link.get(\"href\") for link in soup.find_all(\"a\")]\n",
    "        self.links = [link for link in links if link]\n",
    "            \n",
    "    def get_contents(self):\n",
    "        return f\"Webpage Title:\\n{self.title}\\nWebpage Contents:\\n{self.text}\\n\\n\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "661b5377-c444-45a9-9455-85f83ff525d3",
   "metadata": {},
   "outputs": [],
   "source": [
    "profile = GithubProfile(\"https://github.com/ertgl\")\n",
    "profile.links"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8f9a3c08-0db2-4baa-a8a4-f5642049a57c",
   "metadata": {},
   "outputs": [],
   "source": [
    "link_system_prompt = \"You are provided with a list of links found on a Github page. \\\n",
    "You are able to decide which of the links would be most relevant to include in a portfolio about the github user, \\\n",
    "such as links to an About page, or a Repositories, or Projects.\\n\"\n",
    "link_system_prompt += \"You should respond in JSON as in this example:\"\n",
    "link_system_prompt += \"\"\"\n",
    "{\n",
    "    \"links\": [\n",
    "        {\"type\": \"overview page\", \"url\": \"https://another.full.url\"},\n",
    "        {\"type\": \"repositories page\": \"url\": \"https://another.full.url?tab=repositories\"}\n",
    "    ]\n",
    "}\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "30eafd50-9735-4388-9cc1-8337a00069a2",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(link_system_prompt)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4dc4f366-5c00-441d-b1bd-8dda148f1ffb",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_links_user_prompt(profile):\n",
    "    user_prompt = f\"Here is the list of links on the website of {profile.url} - \"\n",
    "    user_prompt += \"please decide which of these are relevant web links for a portfolio about the user, respond with the full https URL in JSON format. \\\n",
    "Do not include Terms of Service, Privacy, Login, Blog or Github trending related pages.\\n\"\n",
    "    user_prompt += \"Links (some might be relative links):\\n\"\n",
    "    user_prompt += \"\\n\".join(profile.links)\n",
    "    return user_prompt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c066b2ac-5863-408e-bb42-1388d130d164",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(get_links_user_prompt(profile))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dc0ccb95-479c-4f6e-9686-1ff38aa543fa",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_links(url):\n",
    "    profile = GithubProfile(url)\n",
    "    response = openai.chat.completions.create(\n",
    "        model=MODEL,\n",
    "        messages=[\n",
    "            {\"role\": \"system\", \"content\": link_system_prompt},\n",
    "            {\"role\": \"user\", \"content\": get_links_user_prompt(profile)}\n",
    "        ],\n",
    "        response_format= {\"type\": \"json_object\"}\n",
    "    )\n",
    "    result = response.choices[0].message.content\n",
    "    return json.loads(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9f5e3b8b-398d-4e23-867e-401faca7db03",
   "metadata": {},
   "outputs": [],
   "source": [
    "get_links(profile.url)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b9024a4f-4038-4c0e-b0c7-74226feaccfd",
   "metadata": {},
   "source": [
    "# Second step: make the portfolio!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f9906d73-801a-4aea-b620-10ac39eaf424",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_all_details(url):\n",
    "    result = \"Landing page:\\n\"\n",
    "    result += GithubProfile(url).get_contents()\n",
    "    links = get_links(url)\n",
    "    print(\"Found links:\", links)\n",
    "    for link in links[\"links\"]:\n",
    "        result += f\"\\n\\n{link['type']}\\n\"\n",
    "        result += GithubProfile(link[\"url\"]).get_contents()\n",
    "    return result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "02039450-7f7f-4556-8645-39cd31f30265",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(get_all_details(\"https://github.com/ertgl\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4119b96f-0aa1-4cdb-9a09-d51b163069b8",
   "metadata": {},
   "outputs": [],
   "source": [
    "system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a personal github page \\\n",
    "and creates a short portfolio about the user profile, especially projects and repositories and summary of the repo's \\\n",
    "README files for prospective recruiters, investors. Respond in markdown.\\\n",
    "Include details of person profile overview, if you have the information.\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "842834d2-a5e9-4b56-a792-492a1a137fbc",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_portfolio_user_prompt(profile_name, url):\n",
    "    user_prompt = f\"You are looking at a user called: {profile_name} on Github.\\n\"\n",
    "    user_prompt += f\"Here are the contents of its landing page and other relevant pages; use this information to build a short portfolio of the user in markdown.\\n\"\n",
    "    user_prompt += get_all_details(url)\n",
    "    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters\n",
    "    return user_prompt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "285b3a1d-894a-463c-8c30-b5de203b8358",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(get_portfolio_user_prompt(\"Ertuğrul Noyan Keremoğlu\", \"https://github.com/ertgl\"))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "78dc7495-d0a5-409b-8ecf-3a5ef9220e25",
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_portfolio(profile_name, url):\n",
    "    response = openai.chat.completions.create(\n",
    "        model=MODEL,\n",
    "        messages=[\n",
    "            {\"role\": \"system\", \"content\": system_prompt},\n",
    "            {\"role\": \"user\", \"content\": get_portfolio_user_prompt(profile_name, url)}\n",
    "        ]\n",
    "    )\n",
    "    result = response.choices[0].message.content\n",
    "    display(Markdown(result))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "abe39377-2d52-434a-aace-e9397cdd4f20",
   "metadata": {},
   "outputs": [],
   "source": [
    "create_portfolio(\"Ertuğrul Noyan Keremoğlu\", \"https://github.com/ertgl\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "edd168ca-b77b-4fc7-9e11-2114a43553e4",
   "metadata": {},
   "outputs": [],
   "source": [
    "def stream_portfolio(profile_name, url):\n",
    "    stream = openai.chat.completions.create(\n",
    "        model=MODEL,\n",
    "        messages=[\n",
    "            {\"role\": \"system\", \"content\": system_prompt},\n",
    "            {\"role\": \"user\", \"content\": get_portfolio_user_prompt(profile_name, url)}\n",
    "          ],\n",
    "        stream=True\n",
    "    )\n",
    "    \n",
    "    response = \"\"\n",
    "    display_handle = display(Markdown(\"\"), display_id=True)\n",
    "    for chunk in stream:\n",
    "        response += chunk.choices[0].delta.content or ''\n",
    "        response = response.replace(\"```\",\"\").replace(\"markdown\", \"\")\n",
    "        update_display(Markdown(response), display_id=display_handle.display_id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1ea391d4-775e-483d-9e55-e3ae30fa9bd8",
   "metadata": {},
   "outputs": [],
   "source": [
    "stream_portfolio(\"Ertuğrul Noyan Keremoğlu\", \"https://github.com/ertgl\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "498ca0c8-8f68-4389-8184-078706b62cf6",
   "metadata": {},
   "source": [
    "# Multi-lingual with Multi-Tone in Desire Format"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "id": "f11e3391-03f9-409c-9f5a-6286959690ec",
   "metadata": {},
   "outputs": [],
   "source": [
    "def multi_lingual_stream_portfolio(profile_name, url, language, tone):\n",
    "\n",
    "    system_prompt = f\"\"\"\n",
    "You are an assistant that analyzes the contents of several relevant pages from a github profile page and \n",
    "creates a visually appealing and professional short portfolio for prospective investors, and recruiters. \n",
    "The portfolio should be written in {language} and use a {tone.lower()} tone throughout.\n",
    "\n",
    "The portfolio should follow this structure (in {language}):\n",
    "\n",
    "1. **Front Cover**:\n",
    "   - Prominently display the user name as Title.\n",
    "   - Include a compelling headline or tagline.\n",
    "   - Add something engaging relevant to the user’s summarized README files if available.\n",
    "\n",
    "2. **About**:\n",
    "   - Provide a brief introduction to the user's projects approach.\n",
    "   - State which repository they own or they contributed.\n",
    "\n",
    "3. **Overview**:\n",
    "   - Summarize the user's projects, repositories, or solutions by summarized README files if available.\n",
    "   - Highlight benefits or unique developer/development points.\n",
    "   - Mention the follower and following users count and total stars they got.\n",
    "\n",
    "\n",
    "4. **My Culture**:\n",
    "   - Outline the user’s key values or guiding principles.\n",
    "   - Describe the workplace environment (e.g., innovation-driven, inclusive, collaborative).\n",
    "   - Highlight community engagement.\n",
    "\n",
    "5. **What kind of companies may be interested**:\n",
    "   - Describe the target customers or industries served.\n",
    "   - Mention open source contributions also if available. \n",
    "   \n",
    "6. **Projects**:\n",
    "    \n",
    "   ***Owner***:\n",
    "   - List owned projects/repositories with summaries. (Summarize README file of the each project)\n",
    "   \n",
    "   ***Contributer***:\n",
    "   - List contributed projects/repositories with summaries. (Summarize README file of the each project)\n",
    "\n",
    "\n",
    "7. **Support and Donation**:\n",
    "   - Encourage those interested in user's open source projects to donate.\n",
    "   - Provide direct links or steps to apply if available.\n",
    "\n",
    "8. **Contact Us**:\n",
    "   - Provide the user’s address, phone number, and email.\n",
    "   - Include links to social media platforms.\n",
    "   - Add a link to the user’s website.\n",
    "\n",
    "9. **Closing Note**:\n",
    "   - End with a thank-you message or an inspirational note for the reader.\n",
    "   - Add a call-to-action (e.g., “Get in touch today!” or “Explore more on my website”).\n",
    "\n",
    "Ensure the content is concise, engaging, visually clear, and tailored to the target audience. Use headings and subheadings to make the brochure easy to navigate. Include links and contact information wherever applicable.\n",
    "\"\"\"\n",
    "\n",
    "\n",
    "    \n",
    "    stream = openai.chat.completions.create(\n",
    "        model=MODEL,\n",
    "        messages=[\n",
    "            {\"role\": \"system\", \"content\": system_prompt},\n",
    "            {\"role\": \"user\", \"content\": get_portfolio_user_prompt(profile_name, url)}\n",
    "          ],\n",
    "        stream=True\n",
    "    )\n",
    "    \n",
    "    response = \"\"\n",
    "    display_handle = display(Markdown(\"\"), display_id=True)\n",
    "    for chunk in stream:\n",
    "        response += chunk.choices[0].delta.content or ''\n",
    "        response = response.replace(\"```\",\"\").replace(\"markdown\", \"\")\n",
    "        update_display(Markdown(response), display_id=display_handle.display_id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3a38dc0b-27de-4738-8883-b3857e067b45",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "multi_lingual_stream_portfolio(\"Ertuğrul Noyan Keremoğlu\", \"https://github.com/ertgl\", \"English\", \"serious, entertaining, witty\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
